# -*- coding: utf-8 -*-
from BeautifulSoup import BeautifulSoup 
import urllib

def cwbl(u):
    f = urllib.urlopen(u)
    soup = BeautifulSoup(f, fromEncoding='gb18030')
    ndu8 = unicode('年度', 'u8')
    nds = soup.findAll(text=ndu8)
    
    ts = []
    for nd in nds:
        ts.append(nd.parent.parent.parent)

    results = []
    for t in ts:
        results.append(parse_table(t))
    return results

def parse_table(tb):
    result = []

    for row in tb('tr'):
        r = []
        for td_tag in row('td'):
            info = get_info(td_tag)
            if info.startswith(unicode('&nbsp')):r.append(None)
            else:    
                r.append(info)
        result.append(r)

    return result

def get_info(tag):
    s = tag.string
    if not s:
        tag = tag.contents[0]
        return get_info(tag)
    else:
        return s
